If you haven’t already, install these packages (you don’t have to do this every time), so if you need to, uncomment the line below and run it.
#install.packages(c("tidyverse","knitr", "Hmisc"))
Then you load them with the “library” command. Confusingly, when you load the tidyverse library, some of its sub-libraries automatically load, and others need to be separately loaded (e.g. broom).
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.5
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(knitr)
library(broom)
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
# some useful settings (options) ------------------------------------
options(tibble.width = 300,
dplyr.width = 300)
# these make datasets easier to see when they get displayed on screen
# later, you can mess with them and see what they do if you want.
Reminder: how to get help from R. Put a question mark in front of a function or built-in/loaded dataset, and help will appear!
?mean
?diamonds
#(mean is a function, diamonds is a dataset)
You can get in-line help by pressing tab as you go: R will autocomplete what you’re typing within the function it will give you hints about the arguments the function takes try it out by typing mean( in the console below and then hitting tab.
you probably already read in the data in the intro script, but if you’re just jumping in
ma_data <- read_csv("datasets/mental_abacus_data.csv")
## Parsed with column specification:
## cols(
## subid = col_character(),
## class_num = col_character(),
## grade = col_character(),
## group = col_character(),
## year = col_integer(),
## ravens = col_integer(),
## gonogo = col_double(),
## swm = col_double(),
## pvAvg = col_double(),
## arithmeticTotal = col_integer(),
## arithmeticAverage = col_double(),
## woodcockTotal = col_integer()
## )
ps_data <- read_csv("datasets/pragmatic_scales_data.csv")
## Parsed with column specification:
## cols(
## subid = col_character(),
## item = col_character(),
## correct = col_integer(),
## age = col_double(),
## condition = col_character()
## )
remember, you can use summary, glimpse, and View to remind yourself what these data files look like (always good to be very careful with that!)
summary(ma_data)
## subid class_num grade
## Length:328 Length:328 Length:328
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## group year ravens gonogo
## Length:328 Min. :2015 Min. : 0.00 Min. :0.2769
## Class :character 1st Qu.:2015 1st Qu.: 6.00 1st Qu.:0.7231
## Mode :character Median :2016 Median :10.00 Median :0.8154
## Mean :2016 Mean :11.09 Mean :0.7899
## 3rd Qu.:2016 3rd Qu.:15.00 3rd Qu.:0.8692
## Max. :2016 Max. :25.00 Max. :0.9923
## NA's :1
## swm pvAvg arithmeticTotal arithmeticAverage
## Min. :1.125 Min. :0.0000 Min. : 0.000 Min. :0.0000
## 1st Qu.:2.767 1st Qu.:0.0900 1st Qu.: 4.000 1st Qu.:0.0800
## Median :3.777 Median :0.3600 Median : 8.000 Median :0.1700
## Mean :3.818 Mean :0.3649 Mean : 8.419 Mean :0.1751
## 3rd Qu.:4.753 3rd Qu.:0.6400 3rd Qu.:12.000 3rd Qu.:0.2500
## Max. :8.714 Max. :1.0000 Max. :29.000 Max. :0.6000
## NA's :4 NA's :1 NA's :1 NA's :1
## woodcockTotal
## Min. : 0.0
## 1st Qu.: 8.0
## Median :11.0
## Mean :10.5
## 3rd Qu.:13.0
## Max. :20.0
##
# View(ps_data)
glimpse(ma_data)
## Observations: 328
## Variables: 12
## $ subid <chr> "S1-02-03", "S1-02-03", "S1-02-08", "S1-02-08", "S1-02-17", "S1-02-17", "S1-03-05", "S1-03-05", "S1-03-14", "S1-03-14", "S1-03-15", "S1-03-15", "S1-04-01", "S1-04-01", "S1-04-03", "S1-04-03", "S1-04-04", "S1-04-04", "S1-04-07", "S1-04-07", "S1-04-08", "S1-04-08", "S1-0...
## $ class_num <chr> "S1_02", "S1_02", "S1_02", "S1_02", "S1_02", "S1_02", "S1_03", "S1_03", "S1_03", "S1_03", "S1_03", "S1_03", "S1_04", "S1_04", "S1_04", "S1_04", "S1_04", "S1_04", "S1_04", "S1_04", "S1_04", "S1_04", "S1_04", "S1_04", "S1_04", "S1_04", "S1_04", "S1_04", "S1_07", "S1_07",...
## $ grade <chr> "first grade", "first grade", "first grade", "first grade", "first grade", "first grade", "first grade", "first grade", "first grade", "first grade", "first grade", "first grade", "first grade", "first grade", "first grade", "first grade", "first grade", "first grade",...
## $ group <chr> "CNTL", "CNTL", "CNTL", "CNTL", "CNTL", "CNTL", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "MA", "CNTL", "CNTL", "CNTL", "CNTL", "CNTL", "CNTL", "CNTL", "CNTL", "CNTL", "CNTL", "CNTL", "...
## $ year <int> 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015, 2016, 2015,...
## $ ravens <int> 3, 15, 4, 12, 5, 10, 8, 18, 8, 8, 10, 14, 9, 21, 7, 4, 6, 6, 13, 7, 8, 15, 8, 25, 17, 21, 4, 15, 9, 12, 4, 3, 0, 10, 7, 22, 9, 10, 5, 2, 10, 14, 12, 15, 12, 9, 3, 6, 2, 4, 4, 4, 4, 11, 13, 4, 5, 8, 7, 4, 7, 9, 3, 5, 8, 18, 6, 11, 6, 7, 5, 9, 9, 12, 7, 3, 8, 2, 13, 3, 7...
## $ gonogo <dbl> 0.8153846, 0.7076923, 0.7615385, 0.7076923, 0.4307692, 0.7307692, 0.6230769, 0.5615385, 0.7615385, 0.7846154, 0.3615385, 0.7307692, 0.9307692, 0.8615385, 0.7384615, 0.8307692, 0.7230769, 0.7923077, 0.7786260, 0.7307692, 0.8692308, 0.8538462, 0.7538462, 0.8384615, 0.538...
## $ swm <dbl> 1.833333, 2.125000, 1.625000, 1.375000, 1.500000, 4.375000, 2.818182, 5.208333, 3.083333, 3.583333, 1.125000, 2.791667, 4.375000, 3.583333, 3.437500, 4.500000, 2.416667, 5.600000, NA, 3.625000, 2.750000, 4.312500, 1.416667, 3.255319, 4.117647, 5.083333, 3.409091, 5.777...
## $ pvAvg <dbl> 0.00, 0.36, 0.00, 0.36, 0.09, NA, 0.00, 0.91, 0.00, 0.27, 0.00, 0.27, 0.00, 0.82, 0.00, 0.36, 0.00, 0.18, 0.00, 0.27, 0.00, 0.36, 0.00, 0.09, 0.09, 0.45, 0.00, 0.18, 0.00, 1.00, 0.00, 0.18, 0.00, 0.36, 0.27, 0.36, 0.18, 0.18, 0.00, 0.36, 0.09, 0.09, 0.00, 0.09, 0.00, 0...
## $ arithmeticTotal <int> 0, 12, 0, 4, 8, NA, 4, 16, 3, 8, 0, 4, 0, 13, 3, 10, 3, 6, 1, 4, 3, 6, 2, 5, 4, 8, 2, 12, 3, 15, 2, 7, 1, 5, 4, 8, 3, 4, 2, 5, 3, 9, 3, 8, 3, 9, 0, 6, 1, 9, 0, 13, 2, 12, 3, 13, 3, 10, 3, 10, 3, 12, 3, 9, 4, 13, 2, 3, 3, 5, 3, 6, 1, 6, 0, 5, 1, 4, 1, 14, 0, 4, 2, 5, 1,...
## $ arithmeticAverage <dbl> 0.00, 0.25, 0.00, 0.08, 0.17, NA, 0.08, 0.33, 0.06, 0.17, 0.00, 0.08, 0.00, 0.27, 0.06, 0.21, 0.06, 0.13, 0.02, 0.08, 0.06, 0.13, 0.04, 0.10, 0.08, 0.17, 0.04, 0.25, 0.06, 0.31, 0.04, 0.15, 0.02, 0.10, 0.08, 0.17, 0.06, 0.08, 0.04, 0.10, 0.06, 0.19, 0.06, 0.17, 0.06, 0...
## $ woodcockTotal <int> 2, 15, 3, 9, 9, 10, 5, 13, 6, 9, 5, 11, 0, 10, 5, 11, 5, 7, 4, 12, 7, 11, 5, 10, 7, 13, 4, 11, 4, 13, 4, 11, 3, 8, 5, 8, 7, 10, 3, 10, 6, 8, 5, 11, 5, 11, 3, 11, 4, 9, 4, 11, 5, 12, 9, 12, 5, 11, 5, 10, 5, 11, 5, 13, 5, 13, 7, 10, 5, 10, 5, 9, 8, 12, 1, 8, 8, 8, 5, 13,...
let’s just make 2 simple aggregated version of this dataset, by subj & items
dyplrps_data_bysubj_cond <- ps_data %>% #take your dataset
group_by(subid, condition) %>% #retain subject and condition, collapse everything else, i.e. item
summarise(mean_corr = mean(correct, na.rm = TRUE),#create mean for each subj,cond
sum_corr = sum(correct, na.rm = TRUE))# create sum for each subj,cond
ps_data_byitem_cond <- ps_data %>% #take your dataset
group_by(item, condition) %>% #retain subject and item, collapse everything else, i.e. subj
summarise(mean_corr = mean(correct, na.rm = TRUE),#create mean for each item,cond
sum_corr = sum(correct, na.rm = TRUE))# create sum for each item,cond
protip: commenting what every single line does is great practice when you’re stuck!
Check out iris.
?iris
Let’s plot.
ggplot(data = iris)+
geom_point(mapping = aes(x=Sepal.Length,
y =Species))
Now use the same approach on ps_data.
ggplot(data = ps_data_byitem_cond)+
geom_point(mapping = aes(x=condition,
y =sum_corr))
ps_data_byitem_cond
## # A tibble: 8 x 4
## # Groups: item [?]
## item condition mean_corr sum_corr
## <chr> <chr> <dbl> <int>
## 1 beds Label 0.800 60
## 2 beds No Label 0.208 15
## 3 faces Label 0.653 49
## 4 faces No Label 0.181 13
## 5 houses Label 0.547 41
## 6 houses No Label 0.167 12
## 7 pasta Label 0.733 55
## 8 pasta No Label 0.250 18
What’s wrong with this graph?
ggplot(data = ma_data)+
geom_point(mapping = aes(x=grade,
y =arithmeticAverage))
## Warning: Removed 1 rows containing missing values (geom_point).
ma_data
## # A tibble: 328 x 12
## subid class_num grade group year ravens gonogo swm pvAvg
## <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 S1-02-03 S1_02 first grade CNTL 2015 3 0.815 1.83 0.
## 2 S1-02-03 S1_02 first grade CNTL 2016 15 0.708 2.12 0.360
## 3 S1-02-08 S1_02 first grade CNTL 2015 4 0.762 1.62 0.
## 4 S1-02-08 S1_02 first grade CNTL 2016 12 0.708 1.38 0.360
## 5 S1-02-17 S1_02 first grade CNTL 2015 5 0.431 1.50 0.0900
## 6 S1-02-17 S1_02 first grade CNTL 2016 10 0.731 4.38 NA
## 7 S1-03-05 S1_03 first grade MA 2015 8 0.623 2.82 0.
## 8 S1-03-05 S1_03 first grade MA 2016 18 0.562 5.21 0.910
## 9 S1-03-14 S1_03 first grade MA 2015 8 0.762 3.08 0.
## 10 S1-03-14 S1_03 first grade MA 2016 8 0.785 3.58 0.270
## arithmeticTotal arithmeticAverage woodcockTotal
## <int> <dbl> <int>
## 1 0 0. 2
## 2 12 0.250 15
## 3 0 0. 3
## 4 4 0.0800 9
## 5 8 0.170 9
## 6 NA NA 10
## 7 4 0.0800 5
## 8 16 0.330 13
## 9 3 0.0600 6
## 10 8 0.170 9
## # ... with 318 more rows
one thing you should always ask yourself is: how many points,bars, lines should i be seeing?
ggplot(data = ma_data)+
geom_jitter(mapping = aes(x = grade,
y = arithmeticAverage))
## Warning: Removed 1 rows containing missing values (geom_point).
hm, that’s better, but now it feels a little TOO spread out, let’s reign it in
ggplot(data = ma_data)+
geom_jitter(mapping = aes(x=grade,
y =arithmeticAverage),
width = .2,
height = 0)
## Warning: Removed 1 rows containing missing values (geom_point).
ggplot(data = iris)+
geom_point(mapping = aes(x=Sepal.Length,
y =Petal.Width,
color = Species))
ggplot(data = iris)+
geom_point(mapping = aes(x=Sepal.Length,
y =Petal.Width,
alpha = Species))
ggplot(data = iris)+
geom_point(mapping = aes(x=Sepal.Length,
y =Petal.Width,
shape = Species,
alpha = Species))
ggplot(data = iris)+
geom_point(mapping = aes(x=Sepal.Length,
y =Petal.Width,
size = Species))
## Warning: Using size for a discrete variable is not advised.
ggplot(data = iris)+
geom_point(mapping = aes(x=Sepal.Length,
y =Petal.Width),
size = 4)
ggplot(data = iris)+
geom_point(mapping = aes(x=Sepal.Length,
y =Petal.Width))+
facet_wrap(facets = ~Species)
geom_line graph (we refer to this graph below in task 3c)
ggplot(data = ma_data, aes(x= factor(year),#this just makes it treat year as a factor
y= arithmeticAverage,
group = subid))+# group keeps the 'unit' at subid
geom_point()+
geom_line()
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_path).
geom_hline geom_text
ggplot(data = ps_data_byitem_cond, mapping = aes(x=condition,
y=mean_corr))+
geom_point()+
geom_hline(yintercept = .5)+ #hey, this adds a line!
geom_text(label = "1b", x = .7, y= .2, color = "purple")# this but '1b' in the corner!
the x and y tell it where to put the text, here label is 1 on the x axis
in 1d; we refer to this graph in task 3d
ggplot(data = ma_data, aes(x=gonogo))+
geom_histogram(binwidth=.10)
## Warning: Removed 1 rows containing non-finite values (stat_bin).
in 2d
ggplot(data = ps_data_bysubj_cond, aes(x=condition, y = mean_corr))+
geom_boxplot()
with density info:
ggplot(data = ps_data_bysubj_cond, aes(x=condition, y = mean_corr))+
geom_violin()
with density AND dots!
ggplot(data = ps_data_bysubj_cond, aes(x=condition, y = mean_corr))+
geom_violin()+
geom_jitter(width=.1, height=.01, shape =1)# i like shape #1 for legibility
(and examples of ‘local’ vs. ‘global’ variable setting)
global x and y, color just for geom_point
ggplot(data = ma_data, mapping = aes(x = arithmeticTotal, y = gonogo)) +
geom_point(mapping = aes(color = grade)) +
stat_smooth()
## `geom_smooth()` using method = 'loess'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
all vars global: what’s the difference?
ggplot(data = ma_data, mapping = aes(x = arithmeticTotal, y = gonogo, color = grade)) +
geom_point() +
stat_smooth()
## `geom_smooth()` using method = 'loess'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
filter the data for a layer
ggplot(data = ma_data, mapping = aes(x = arithmeticTotal, y = gonogo, color = grade)) +
geom_point() +
stat_smooth(data = filter(ma_data,grade=="first grade"))# the smoother only gets grade 1 data!
## `geom_smooth()` using method = 'loess'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
take out a class, remove confidence bnd
ggplot(data = ma_data, mapping = aes(x = arithmeticTotal, y = gonogo, color = grade)) +
geom_point() + #the points include everyone
stat_smooth(data = filter(ma_data,group != "MA"),
se = FALSE) # but the smoother doesn't see MA group
## `geom_smooth()` using method = 'loess'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
what does se = FALSE do?
stat_smooth default is loess (local estimator)
ggplot(data = ma_data, mapping = aes(x = arithmeticTotal, y = gonogo)) +
geom_point(aes(color = grade)) +
stat_smooth()
## `geom_smooth()` using method = 'loess'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
but you can make it fit a line
ggplot(data = ma_data, mapping = aes(x = arithmeticTotal, y = gonogo)) +
geom_point(aes(color = grade)) +
stat_smooth( method="lm")
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
mean by condition, no error bars yet
ggplot(data = ps_data, aes(x = condition, y = correct)) +
stat_summary(fun.y=mean,
na.rm=T,
geom = "bar")
barbarplots? [cf twitter]
95% confidence interval
ggplot(data = ps_data, aes(x = condition, y = correct)) +
stat_summary(fun.data = mean_cl_boot, geom = "pointrange") #fun.data, not fun.y!
mean_cl_boot is boostrapped confidence intervals, you can google what regular normal CIs would be!
both:
ggplot(data = ps_data, aes(x = condition, y = correct)) +
stat_summary(fun.y = mean, na.rm=T, geom = "bar")+
stat_summary(fun.data = mean_cl_boot, geom = "pointrange")
same as violin plot above, but now with an errorbar!
ggplot(data = ps_data_bysubj_cond, aes(x=condition, y = mean_corr))+
geom_violin()+
stat_summary(fun.data=mean_cl_normal, geom = "pointrange")
protip: use
fillwith bars not colour!
bonus question: what does colour do for bars?
ggplot(data = ma_data) +
geom_bar(mapping = aes(x = woodcockTotal, fill = grade), position = "fill")
ggplot(data = ma_data) +
geom_bar(mapping = aes(x = woodcockTotal, fill = grade), position = "stack")
ggplot(data = ma_data) +
geom_bar(mapping = aes(x = woodcockTotal, fill = grade), position = "dodge")
ps_data_byitem_cond showing the mean_corr for each condition using geom_bar (hint, you’ll need to use “stat=” insde your geom_bar() callfill, stack, or dodge??ggsave()
ggsave will save your last plot by default, but you can also tell it save a plot you’ve assigned.
mygraph <- ggplot(data = iris)+
geom_point(mapping = aes(x=Sepal.Length,
y =Petal.Width,
color = Species))
mygraph
ggsave("mygraph.pdf",plot = mygraph,dpi = 100)
## Saving 7 x 5 in image
even better than saving your graph: add it to your R Markdown! the awesome thing about using your .Rmd file is that you can render graphs there, and they get saved for you!
there are LOTS of settings you can muck with. (details here https://yihui.name/knitr/options/#plots). we’ll do this back in our .rmd file
hint for group a
ggplot(data = ma_data, aes(x= factor(year),#this just makes it treat year as a factor
y= arithmeticAverage,
group = subid))+# this keeps the 'unit' at subid
geom_point()+
geom_line()+facet_wrap(~grade)+
stat_summary(color = "red", size = 3, geom="line", fun.y=mean, aes(group =grade))
## Warning: Removed 1 rows containing non-finite values (stat_summary).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_path).
hint 1 for group b
xvar <- c(rnorm(1500, mean = -1), rnorm(1500, mean = 1.5))
yvar <- c(rnorm(1500, mean = 1), rnorm(1500, mean = 1.5))
zvar <- as.factor(c(rep(1, 1500), rep(2, 1500)))
xy <- data.frame(xvar, yvar, zvar)
ggplot(xy, aes(xvar, yvar)) + geom_point() + geom_rug(col = "darkred", alpha = 0.1)
hint 2 for Group b
all the code for this graph appears to be here, BUT this person did not do things the tidy way! https://micahallen.org/2018/03/15/introducing-raincloud-plots/. exercise for the reader: do his wrangling the tidy way!
but using our ps_data_bysubj_cond and sourcing this:
source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")
you should be able to make your raincloud:)
hint for group c: here’s a sample dataset and a graph to get you started in the right direction
library(feather)# this is part of tidyverse, but not auto-loaded
feather is a format that’s convenient for various reasons
coart<- read_feather("datasets/coart_test")
summary(coart)
## timeBin FixationID SubjectNumber gaze
## Min. : 0.0 Min. : 1 a01 : 10893 DISTRACTOR: 56765
## 1st Qu.:103.0 1st Qu.:2232 a10 : 10757 TARGET :114850
## Median :203.0 Median :4848 a14 : 10485 NA's : 21748
## Mean :203.9 Mean :4757 a05 : 10446
## 3rd Qu.:303.0 3rd Qu.:7146 a17 : 10418
## Max. :653.0 Max. :9160 a11 : 10380
## (Other):129984
## Trial RT TRIAL_START_TIME audiotarget
## Min. : 1.00 Min. :-1 Min. : 205001 Length:193363
## 1st Qu.: 7.00 1st Qu.:-1 1st Qu.: 1500796 Class :character
## Median :13.00 Median :-1 Median : 6501502 Mode :character
## Mean :12.58 Mean :-1 Mean :15150737
## 3rd Qu.:19.00 3rd Qu.:-1 3rd Qu.:10383920
## Max. :24.00 Max. :-1 Max. :69109516
## NA's :20171
## carrier distractorimage TargetImage
## Length:193363 Length:193363 Length:193363
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## distractorloc targetloc Pair
## Length:193363 Length:193363 Length:193363
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## first second targetwordonset order
## Length:193363 Length:193363 Min. : 0.0 Min. :1.00
## Class :character Class :character 1st Qu.: 690.0 1st Qu.:1.00
## Mode :character Mode :character Median : 828.0 Median :1.00
## Mean : 820.6 Mean :1.49
## 3rd Qu.:1141.0 3rd Qu.:2.00
## Max. :1276.0 Max. :2.00
##
## targetside TrialType Time propt
## Length:193363 cross:57652 Min. : 0 Min. :0.000
## Class :character NA :20171 1st Qu.: 2060 1st Qu.:0.000
## Mode :character same :57508 Median : 4060 Median :1.000
## unfam:58032 Mean : 4077 Mean :0.669
## 3rd Qu.: 6060 3rd Qu.:1.000
## Max. :13060 Max. :1.000
## NA's :21748
## targetnum TargetOnset Nonset prewin longwin
## egg.png : 15128 Min. :2500 Min. :-3760.0 N:115674 N: 86944
## bed.png : 15051 1st Qu.:3190 1st Qu.:-1200.0 Y: 77689 Y:106419
## dog.png : 14589 Median :3328 Median : 780.0
## foot.png: 14575 Mean :3321 Mean : 765.3
## hand.png: 14271 3rd Qu.:3641 3rd Qu.: 2720.0
## crib.png: 14114 Max. :3776 Max. :10560.0
## (Other) :105635
## whichwin_long medwin whichwin_med shortwin whichwin_short
## long :106419 N:116193 med :106419 N:152767 neither: 9255
## neither: 9255 Y: 77170 neither: 9255 Y: 40596 pre : 77689
## pre : 77689 pre : 77689 short :106419
##
##
##
##
## adult_type
## Length:193363
## Class :character
## Mode :character
##
##
##
##
do you know what each of these lines do? can you make errorbars?
ggplot(subset(coart, Nonset<5000 & Nonset>-1500),
aes(Nonset, propt, color = TrialType))+
geom_hline(yintercept=.5)+
ylab("proportion of target looking")+
xlab("time from target onset")+
geom_vline(xintercept=0)+
stat_smooth(geom="point")+
theme_bw(base_size=18)
## `geom_smooth()` using method = 'gam'
## Warning: Removed 17424 rows containing non-finite values (stat_smooth).
if all we wanted to do was add a regression line, we’d just use stat_smooth: note this is like the graph we did with the errorbars above, just edited a little
ggplot(ToothGrowth, aes(x=dose, y=len, colour=supp)) +
stat_summary(fun.y = mean, geom = "point", size = 4) +
geom_point( size = 1)+
stat_smooth(method="lm")
but if we want to know what the actual formula for that line is, we have to calculate some things:
first we need a linear model
ourmodel <- lm(data = ToothGrowth, len~dose*supp)
if you want to know more about the results you do a summary of the model
summary(ourmodel)
##
## Call:
## lm(formula = len ~ dose * supp, data = ToothGrowth)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.2264 -2.8462 0.0504 2.2893 7.9386
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.550 1.581 7.304 1.09e-09 ***
## dose 7.811 1.195 6.534 2.03e-08 ***
## suppVC -8.255 2.236 -3.691 0.000507 ***
## dose:suppVC 3.904 1.691 2.309 0.024631 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.083 on 56 degrees of freedom
## Multiple R-squared: 0.7296, Adjusted R-squared: 0.7151
## F-statistic: 50.36 on 3 and 56 DF, p-value: 6.521e-16
if you want the summary results to look prettier you tidy the model
tidy(ourmodel)
## term estimate std.error statistic p.value
## 1 (Intercept) 11.550000 1.581394 7.303681 1.089558e-09
## 2 dose 7.811429 1.195422 6.534454 2.027753e-08
## 3 suppVC -8.255000 2.236429 -3.691152 5.073393e-04
## 4 dose:suppVC 3.904286 1.690582 2.309433 2.463136e-02
in our case, we can use the results of the model to manually put in a line, but there are fancier ways to do this that are beyond the scope of this tutorial
ggplot(ToothGrowth, aes(x=dose, y=len, colour=supp)) +
stat_summary(fun.y = mean, geom = "point", size = 4) +
geom_point( size = 1)+
stat_smooth(method="lm")+
annotate(x = 1, y = 30, "text", label = "y = 11.55 + 7.8 *dose + -8.26*suppVC + 3.9 * dose * suppVC")
note: for annotate, the x and y is where on the graph you want your text to go
if you want to check this formula, you can plug in some values:
#11.55 + 7.8 *dose + -8.26*suppVC + 3.9 * dose * suppVC
11.55 + 7.8*0 + -8.26*0 + 3.9*0*0 # dose of 0 for oj
## [1] 11.55
11.55 + 7.8*1 + -8.26*0 + 3.9*1*0# dose of 1 for oj
## [1] 19.35
11.55 + 7.8*2 + -8.26*0 + 3.9*2*0# dose of 2 for oj
## [1] 27.15
11.55 + 7.8*0 + -8.26*1 + 3.9*0*1# dose of 0 for vc
## [1] 3.29
11.55 + 7.8*1 + -8.26*1 + 3.9*1*1# dose of 1 for vc
## [1] 14.99
11.55 + 7.8*2 + -8.26*1 + 3.9*2*1# dose of 1 for vc
## [1] 26.69
From: http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
tgc <- ToothGrowth%>%
group_by(supp, dose)%>%
summarise(n = n(),
mean_len = mean(len),
sd_len = sd(len),
se_len = sd_len/sqrt(n),
ci_len = qt((.95/2 +.5),
df= n-1)*se_len) # looking up 95%'s 2 tails in t-dist
ggplot(tgc, aes(x=dose, y=mean_len, colour=supp)) +
geom_errorbar(aes(ymin=mean_len - ci_len, ymax = mean_len + ci_len)) +
geom_line() +
geom_point()
ggplot(ToothGrowth, aes(x=dose, y=len, colour=supp)) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar") +
stat_summary(fun.y = mean, geom = "point") +
stat_summary(fun.y = mean, geom = "line")